
Sr. RF Planning and Optimization Engineer
BSc Telecommunications Engineering, MS Data Science
F2017313014@umt.edu.pk
https://www.linkedin.com/in/engumersaeed/
https://github.com/umersaeed81h
14-Aug-2019


Chapter 11: Data Frame in Python
Dataframe is a main object in Pandas. It is used to represent data with rows and columnns (tabular or excel spreadsheed like data).
A data frame is a table or a two-dimensional array-like structure in which each column contains values of one variable and each row contains one set of values from each column.
Following are the characteristics of a data frame:
A fast and efficient DataFrame object for data manipulation with integrated indexing.
Tools for reading and writing data between in-memory data structures and different formats:
from IPython.display import YouTubeVideo
YouTubeVideo('CmorAWRsCAw',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('F6kmIpWWEdU',width=900, height=500)
import pandas as pd
import numpy as np
import os
import glob
from glob import glob
pd.__version__
pd.show_versions()
from IPython.display import YouTubeVideo
YouTubeVideo('-Ov1N1_FbP8',width=900, height=500)
There are many ways to do this, but my favorite ways are;
from IPython.display import YouTubeVideo
YouTubeVideo('3k0HbcUGErE',width=900, height=500)
weather=pd.DataFrame({
'Day':['1/1/2019','1/2/2019','1/3/2019','1/4/2019'],
'Temperature':[30,32,34,36],
'Windspeed':[6,7,10,12],
'Event':['Rain','Sunny','Rain','Sunny']
})
weather
weather=pd.DataFrame.from_dict({
'Day':['1/1/2019','1/2/2019','1/3/2019','1/4/2019'],
'Temperature':[30,32,34,36],
'Windspeed':[6,7,10,12],
'Event':['Rain','Sunny','Rain','Sunny']
})
weather
weather=pd.DataFrame([
('1/1/2019',30,6,'Rain'),
('1/2/2019',32,7,'Sunny'),
('1/3/2019',34,10,'Rain'),
('1/4/2019',36,12,'Sunny')],
columns=["Day","Temperature","Windspeed","Event"])
weather
sales = pd.DataFrame.from_records([('Jones LLC', 150, 200, 50),
('Alpha Co', 200, 210, 90),
('Blue Inc', 140, 215, 95)],
columns = ['account', 'Jan', 'Feb', 'Mar'])
sales
weather=pd.DataFrame([
{'Day':'1/1/2019','Temperature':30,'Windspeed':6,'Event':'Rain'},
{'Day':'1/2/2019','Temperature':32,'Windspeed':7,'Event':'Sunny'},
{'Day':'1/3/2019','Temperature':34,'Windspeed':8,'Event':'Sunny'},
{'Day':'1/4/2019','Temperature':36,'Windspeed':8,'Event':'Rain'}
])
weather
sales = pd.DataFrame.from_dict(
[{'account': 'Jones LLC', 'Jan': 150, 'Feb': 200, 'Mar': 140},
{'account': 'Alpha Co', 'Jan': 200, 'Feb': 210, 'Mar': 215},
{'account': 'Blue Inc', 'Jan': 50, 'Feb': 90, 'Mar': 95 }]
)
sales
sales = sales[['account', 'Jan', 'Feb', 'Mar']]
sales
from collections import OrderedDict
sales = pd.DataFrame.from_dict(
OrderedDict([('account', ['Jones LLC', 'Alpha Co', 'Blue Inc']),
('Jan', [150, 200, 50]),
('Feb', [200, 210, 90]),
('Mar', [140, 215, 95]) ])
)
sales
sales = pd.DataFrame.from_items(
[('account', ['Jones LLC', 'Alpha Co', 'Blue Inc']),
('Jan', [150, 200, 50]),
('Feb', [200, 210, 90]),
('Mar', [140, 215, 95]),
])
sales
df= pd.DataFrame(np.random.rand(4, 8))
df
df = pd.read_clipboard()
df
df=pd.DataFrame({'A' : []})
df
df = pd.DataFrame({'A' : [np.nan]})
df
df=df.empty
df
from IPython.display import YouTubeVideo
YouTubeVideo('5_QXMwezPJE',width=900, height=500)
We can import Following Format Files in Pandas
df= pd.read_csv('Weather.csv')
df
df= pd.read_csv('Weather.csv',skiprows=1)
df
df= pd.read_csv('Weather.csv',header=1)
df
df= pd.read_csv('Weather_header.csv')
df
df= pd.read_csv('Weather_header.csv',header=None)
df
df= pd.read_csv('Weather_header.csv',
names=['Date','Temperature',
'Windspeed','Event'])
df
df= pd.DataFrame(np.random.rand(4, 8),columns=list('abcdefgh'))
df
df= pd.read_csv('Weather1.csv',nrows=2)
df
df= pd.read_csv('Weather1.csv',usecols=['Date','Temperature'])
df
df= pd.read_csv('Weather1.csv',usecols=[0,3])
df
df= pd.read_csv('Family.csv')
df
df= pd.read_csv('Family.csv',skipfooter=1)
df
df= pd.read_csv('Family.csv',skipfooter=1,engine='python')
df
from IPython.display import YouTubeVideo
YouTubeVideo('0uBirYFhizE',width=900, height=500)
df= pd.read_csv('check.csv')
df
df.columns = ['Name', 'Student ID','Marks']
df
df= pd.read_csv('check.csv')
df.rename(columns={"Unnamed: 1":"Student ID"})
df = pd.DataFrame({'col one':[100, 200], 'col two':[300, 400]})
df
df.columns = df.columns.str.replace(' ', '_')
df
df = pd.DataFrame({'col one':[100, 200], 'col two':[300, 400]})
df.add_prefix('X_')
df = pd.DataFrame({'col one':[100, 200], 'col two':[300, 400]})
df.add_suffix('_Y')
df= pd.read_html('https://github.com/justmarkham/pandas-videos/blob/master/data/titanic_test.csv')
type(df)
df=df[0]
df.head()
df=df.iloc[:,1:]
df.head()
df = pd.read_table('chipotle.tsv')
df.head()
df = pd.read_csv('chipotle.tsv',sep='\t')
df.head()
df = pd.read_html('https://en.wikipedia.org/wiki/Pakistan',attrs={'class': 'wikitable'})
len(df)
df[0]
import pandas as pd
df = pd.read_html('https://en.wikipedia.org/wiki/Pakistan')
len(df)
df[10]
df = pd.read_html('https://en.wikipedia.org/wiki/Pakistan', attrs={"class":"sortable wikitable"})
len(df)
df[0]
df = pd.read_html('https://en.wikipedia.org/wiki/Pakistan', match='Corporations')
len(df)
df[0]
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_UFC_events',attrs={"id":"Scheduled_events"})
len(df)
df[0].head()
df = pd.read_html('https://en.wikipedia.org/wiki/University_of_California,_Berkeley', attrs={"class":"infobox vcard"})
len(df)
df[0].head()
df = pd.read_html('https://en.wikipedia.org/wiki/University_of_California,_Berkeley', attrs={"class":"infobox"})
len(df)
df[0].head()
df = pd.read_html('https://www.esportsearnings.com/players',attrs={"class":"detail_list_table"},header=0)
len(df)
df[0].head()
df=pd.read_excel('WHO_ex.xlsx','WHO')
df.head()
df = pd.ExcelFile('Weather_subject.xlsx')
sheet_names = df.sheet_names
sheet_names
Weather = pd.read_excel(df, 'Weather')
student = pd.read_excel(df, 'Subject')
Weather
student
for tab in sheet_names:
print('################################## ' + tab + ' ##################################')
dfall = pd.read_excel(df, tab)
print(dfall)
df=pd.read_fwf('KPK_Weather.prn',colspecs = [(0,63),(63,76),(76,81),(81,92),(92,102)],parse_dates=["Day"])
df
import sys
!{sys.executable} -m pip install tabula-py
import tabula
from tabula import read_pdf
df = read_pdf("http://www.uncledavesenterprise.com/file/health/Food%20Calories%20List.pdf", pages='all', multiple_tables=True)
len(df)
df[4].head()
from IPython.display import YouTubeVideo
YouTubeVideo('-0NwrcZOKh',width=900, height=500)
We can Export Following Format Files in Pandas
student=pd.DataFrame({
'Name':['Umer','Ali','Ahmed','Abdullah'],
'Marks':[100,98,97,95],
'Result':['Pass','Pass','Pass','Pass'],
'Subject':['Math','Phy','Chem','Bio']
})
student
weather=pd.DataFrame({
'day':['1/1/2019','1/2/2019','1/3/2019','1/4/2019'],
'temperature':[30,32,34,36],
'windspeed':[6,7,10,12],
'event':['Rain','Sunny','Rain','Sunny']
})
weather
student.to_csv('Output1.csv')
student.to_csv('output2.csv',index=False)
student.to_csv('output3.csv',
index=False,columns=['Name','Marks'])
student.to_csv('output4.csv',index=False,header=False)
student.to_excel("output5.xlsx",sheet_name="Student")
student.to_excel("output6.xlsx",sheet_name="Student",
startrow=3,startcol=3,index=False)
with pd.ExcelWriter('output7.xlsx') as writer:
weather.to_excel(writer,sheet_name="Weather",index=False)
student.to_excel(writer,sheet_name="Subject",index=False)
from IPython.display import YouTubeVideo
YouTubeVideo('M-4EpNdlSuY',width=900, height=500)
df= pd.read_csv('Family.csv',skipfooter=1,engine='python')
df
df.to_dict('series')
df.to_dict('split')
df.to_dict('index')
from collections import OrderedDict, defaultdict
df.to_dict(into=OrderedDict)
df.to_dict('records', into=defaultdict(list))
df= pd.read_csv('Family.csv',skipfooter=1,engine='python')
df
df.blocks
df= pd.read_csv('Family.csv',skipfooter=1,engine='python')
df
df.to_records()
df.to_records(index=False)
import pandas as pd
df= pd.read_csv('Family.csv',skipfooter=1,engine='python')
df
df.values
df.to_numpy()
df['Name'].array
df = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6]})
df
df['a'].to_list()
df = pd.Series([[1, 2, 3], 'foo', [], [3, 4]])
df
df.explode()
df=pd.DataFrame({'A':[1,2],'B':[[1,2],[1,2]]})
df
df.explode('B')
df= pd.read_csv('Family.csv',skipfooter=1,engine='python')
df
df.get_values()
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who.head(2)
df_who.tail()
df_who.tail(2)
df_who.sample(3)
df_who.shape
rows,columns= df_who.shape
print(rows,columns)
print("No of rows in the WHO Data Set:")
print(rows)
print("No of columns in the WHO Data Set:")
print(columns)
df_who.ndim
s = pd.Series({'a': 1, 'b': 2, 'c': 3})
s
s.ndim
df_who.size
s = pd.Series({'a': 1, 'b': 2, 'c': 3})
s
s.size
df_who.columns
df_who.keys()
df_who['Country'].name
df_who.index
df_who.axes
from IPython.display import YouTubeVideo
YouTubeVideo('XaCSdr7pPmY',width=900, height=500)
df_who.set_index('Region',inplace=True)
df_who
df_who= pd.read_csv('WHO_csv.csv')
df_who.set_index(['Region','Country'],inplace=True)
df_who
df_who.reset_index(['Region','Country'],inplace=True)
df_who.head()
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df
df.set_axis(['a', 'b', 'c'], axis='rows', inplace=True)
df
df.set_axis(['A1','B1'], axis='columns', inplace=True)
df
from IPython.display import YouTubeVideo
YouTubeVideo('hSrDViyKWVk',width=900, height=500)
df_who= pd.read_csv('WHO_csv.csv')
df_who.dtypes
df_who['Country'].dtype
df_who['Country'].dtypes
df_who.get_dtype_counts()
df = pd.DataFrame({"A": ["a", 1, 2, 3]})
df
df = df.iloc[1:]
df
df.dtypes
df.infer_objects().dtypes
df = pd.DataFrame({"A": ["a", 1, 2, 3]})
df
df.dtypes
df.A.apply(type)
df.A.apply(type).value_counts()
from IPython.display import YouTubeVideo
YouTubeVideo('V0AWyzVMf54',width=900, height=500)
df = pd.DataFrame({'col_one':['1.1', '2.2', '3.3'],
'col_two':['4.4', '5.5', '6.6'],
'col_three':['7.7', '8.8', '-']})
df
df.dtypes
df.astype({'col_one':'float', 'col_two':'float'}).dtypes
pd.to_numeric(df.col_three, errors='coerce')
pd.to_numeric(df.col_three, errors='coerce').fillna(0)
df1=df.apply(pd.to_numeric, errors='coerce').fillna(0)
df1
df1.dtypes
import pandas as pd
df= pd.read_csv('Weather1.csv')
df
df.dtypes
df= pd.read_csv('Weather1.csv',dtype = {"Temperature" : "float64","Windspeed" : "float64"})
df
df.dtypes
df= pd.read_csv('Weather1.csv',parse_dates=['Date'],dtype = {"Temperature" : "float64","Windspeed" : "float64"})
df
df.dtypes
from IPython.display import YouTubeVideo
YouTubeVideo('P_q0tkYqvSk',width=900, height=500)
df_who.ftypes
df_who.get_ftype_counts()
df_who.select_dtypes(include='number').head()
df_who.select_dtypes(include='float64').head()
df_who.select_dtypes(include='int64').head()
df_who.select_dtypes(include='object').head()
df_who.select_dtypes(include=['int64', 'object']).head()
df_who.select_dtypes(exclude='number').head()
df= pd.read_csv("DateIssue.csv")
df
type(df.Date[0])
df= pd.read_csv("DateIssue.csv",parse_dates=['Date'])
df
type(df.Date[0])
from IPython.display import YouTubeVideo
YouTubeVideo('wDYDYGyN_cw',width=900, height=500)
df_who.memory_usage()
df_who.memory_usage(index=False)
df_who['Region'].astype('category').memory_usage(deep=True)
df_who['Region'].astype('category').memory_usage(deep=False)
df_who['Country'].nbytes
from IPython.display import YouTubeVideo
YouTubeVideo('fCMrO_VzeL8',width=900, height=500)
df_who= pd.read_csv('WHO_csv.csv')
df_who.info()
# df_who.info(verbose=True)
df_who.info(verbose=False)
df_who.isnull().head()
df_who.isnull().sum()
df_who.isna().head()
df_who.isna().sum()
df_who.notnull().head()
df_who.notnull().sum()
df_who.notna().head()
df_who.notna().sum()
df_who['PrimarySchoolEnrollmentMale'].hasnans
df_who['Region'].hasnans
df_who= pd.read_csv('WHO_csv.csv')
df_who.sum()
df_who.count()
df_who.max()
df_who.min()
df_who.mean()
df_who.median()
df = pd.DataFrame({'A': [2, 2, 1, 2, 1, 2, 3]})
df.mode()
df_who.std()
df_who.kurt()
df_who.kurtosis()
df_who.mad(axis = 0)
df_who.mad()
df_who.sem()
df_who.var()
df_who.skew()
df_who.cov()
from IPython.display import YouTubeVideo
YouTubeVideo('sCkS-0kIRCE',width=900, height=500)
df_who.corr(method='pearson')
other methods are available in Pandas are;
df1 = pd.DataFrame({"A":[1, 5, 7, 8],
"B":[5, 8, 4, 3],
"C":[10, 4, 9, 3]})
df2 = pd.DataFrame({"A":[5, 3, 6, 4],
"B":[11, 2, 4, 3],
"C":[4, 3, 8, 5]})
df1.corrwith(df2, axis = 0)
df1.corrwith(df2, axis = 1)
df_who.quantile([0.25,0.50,0.75,0.100], numeric_only=True)
from IPython.display import YouTubeVideo
YouTubeVideo('PtO3t6ynH-8',width=900, height=500)
spl2= pd.read_csv('Scopus_1926-1950.csv',encoding='latin-1')
# Preprocessing
# Remove Space between Author(s)ID
spl2['Author(s)ID'] = spl2['Author(s)ID'].str.replace(" ","")
# Remove Author(s)ID end with ;
spl2['Author(s)ID'] = spl2['Author(s)ID'].str.rstrip(';')
spl2 = spl2.join(spl2['Author(s)ID'].str.split(';', expand=True).add_prefix('Author_'))
spl2.head()
spl2.keys()
spl2['AuthorCounts']=spl2.iloc[:,19:].count(axis=1)
spl2.head()
from IPython.display import YouTubeVideo
YouTubeVideo('B-r9VuK80dk',width=900, height=500)
df_who.describe()
df_who.describe(include='all')
df_who['Population'].describe()
df_who[['Population','Over60']].describe()
df_who.describe(include=['object'])
df_who.describe().loc['min':'max']
df_who.describe().loc['min':'max', 'Under15':'ChildMortality']
from IPython.display import YouTubeVideo
YouTubeVideo('EaHWjkEPHr8',width=900, height=500)
import sys
!{sys.executable} -m pip install pandas-profiling
Then, simply run the ProfileReport() function and pass it any DataFrame. It returns an interactive HTML report:
import pandas as pd
import pandas_profiling
df_who= pd.read_csv('WHO_csv.csv')
pandas_profiling.ProfileReport(df_who)
from IPython.display import YouTubeVideo
YouTubeVideo('zxqjeyKP2Tk',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('OYZNk7Z9s6I',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('xvpNA7bC8cs',width=900, height=500)
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who.at[1, 'Country']
df_who.at[3, 'Population']=780
df_who.head()
df_who.loc[3].at['Population']
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who.iat[0, 0]
df_who.iat[2, 3]=880
df_who.head()
df_who.loc[0].iat[0]
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who.ix[10:20, ['Country','Over60']]
df_who.ix[10:15,]
df_who.ix[:4, 1:3]
df_who.ix[[1,3,5], [1,8]]
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who.filter(['Country', 'CellularSubscribers','Population']).head()
df_who.filter(regex ='[cC]').head()
df_who.filter(like ='Enr').head()
df_who= pd.read_csv('WHO_csv.csv')
df_who[:]
df_who[10:14]
df_who.iloc[10:14]
df_who.loc[[2,4,6]]
df_who.iloc[:,1:4]
df_who.iloc[14:25, 1:4]
df_who.Country
df_who['Country']
df_who[['Country', 'Region', 'Population']]
df_who.loc[1:4,['Country', 'Region', 'Population']]
df_who.loc[[2,4,50,52],['Country', 'Region', 'Population']]
df_who.head()
pd.concat([df_who.loc[:,'Country':'Under15'],df_who.loc[:,'FertilityRate'],df_who.loc[:,'ChildMortality':'GNI']],axis='columns').head()
df_who= pd.read_csv('WHO_csv.csv')
df_who[list(df_who.columns[0:3])+list(df_who.columns[5:7])+list(df_who.columns[8:11])].head()
df_who.iloc[:,np.r_[0:3,5,9:11]].head()
df_who.loc[df_who.Population>360000,['Country']]
df_who.loc[df_who.Population>360000,['Country','Population']]
df_who.loc[::-1]
df_who.loc[::-1].reset_index(drop=True)
df_who.loc[:, ::-1]
from IPython.display import YouTubeVideo
YouTubeVideo('egdfGJaBIh0',width=900, height=500)
df = pd.DataFrame({"A":[4, 5, 2, 6],
"B":[11, 2, 115, 8],
"C":[1, 8, 66, 4]})
df
#df.idxmax(axis=0)
df.idxmax()
#0='rows'
df.loc[df.idxmax(axis=0)]
df.idxmax(axis=1)
#1='column'
df = pd.DataFrame({"A":[4, 5, 2, 6],
"B":[11, 2, 115, 8],
"C":[1, 8, 66, 4]})
df
#df.idxmin(axis=0)
df.idxmin()
#0='rows'
df.idxmin(axis=1)
#1='colmun'
df = pd.DataFrame({
"A1":[np.NaN,np.NaN,np.NaN,5,6,np.NaN,np.NaN],
})
df
df.first_valid_index()
df = pd.DataFrame({
"A1":[np.NaN,np.NaN,np.NaN,5,6,np.NaN,np.NaN],
})
df
df.last_valid_index()
df=pd.read_csv('rep.csv')
df
df.T
df.transpose()
df=pd.read_csv('rep.csv')
df.xs(1)
df.xs('Name',axis=1)
df = pd.DataFrame({"A":[1, 2, 3, 4, 5],
"B":[10, 20, 30, 40, 50],
"C":[11, 22, 33, 44, 55],
"D":[12, 24, 51, 36, 2]})
df
df.slice_shift(2)
df.slice_shift(2, axis = 0)
df.slice_shift(2, axis = 1)
df.slice_shift(-2, axis = 1)
df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],
'B': ['f', 'g', 'h', 'i', 'j'],
'C': ['k', 'l', 'm', 'n', 'o']})
df
df.truncate(before=2, after=3)
df.truncate(before="A", after="B", axis="columns")
df['A'].truncate(before=2, after=4)
from IPython.display import YouTubeVideo
YouTubeVideo('2AFGPdNn4FM',width=900, height=500)
import pandas as pd
df_who= pd.read_csv('WHO_csv.csv')
df_who[df_who.Population>360000]
df_who[df_who['Population'].gt(360000)]
df_who[df_who.Population<360000]
df_who[df_who['Population'].lt(360000)]
df_who[df_who.Population==28541]
df_who[df_who['Population'].eq(28541)]
df_who[df_who.Population!=28541]
df_who[df_who['Population'].ne(28541)]
df_who[df_who.Population>=318000]
df_who[df_who['Population'].ge(318000)]
df_who[df_who.Population<=10000]
df_who[df_who['Population'].le(10000)]
df_who[df_who.Population==df_who.Population.max()]
df_who[df_who.Population==df_who['Population'].max()]
from IPython.display import YouTubeVideo
YouTubeVideo('YPItfQ87qjM',width=900, height=500)
df_who[(df_who.Population>=20000) & (df_who.LifeExpectancy==60)]
df_who[df_who['Population'].ge(20000)& (df_who['LifeExpectancy'].eq(60))]
df_who[df_who.Country.str.contains('istan')]
df_who[df_who['Country'].str.startswith('A')]
df_who[df_who['Country'].str.endswith('n')]
df_who[['Region','CellularSubscribers','Population']][df_who.Population==df_who['Population'].max()]
df_who.Region.unique()
df_who.Region.nunique()
df_who[(df_who.Region == 'Africa') |
(df_who.Region == 'Americas') |
(df_who.Region == 'South-East Asia')]
df_who[df_who.Region.isin(['Africa', 'Americas', 'South-East Asia'])]
df_who[~df_who.Region.isin(['Africa', 'Americas', 'South-East Asia'])]
from IPython.display import YouTubeVideo
YouTubeVideo('QTVTq8SPzxM',width=900, height=500)
df_who.Region.value_counts()
from IPython.display import YouTubeVideo
YouTubeVideo('15q-is8P_H4',width=900, height=500)
df_who.Region.value_counts().sort_index()
df_who.Region.value_counts(normalize=True).sort_index()
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who.sort_index(axis=1, inplace=True)
df_who.head()
df_who.Region.value_counts().nlargest(n=3)
df_who.Region.value_counts().nlargest(n=3).index
df_who[df_who.Region.isin(df_who.Region.value_counts().nlargest(3).index)]
df_who[~df_who.Region.isin(df_who.Region.value_counts().nlargest(3).index)]
df_who.nlargest(3, 'Population')
df_who.nlargest(3, 'LifeExpectancy', keep='last')
df_who.nlargest(3, 'LifeExpectancy', keep='all')
df_who.Region.value_counts().nsmallest(3)
df_who.Region.value_counts().nsmallest(3).index
df_who[df_who.Region.isin(df_who.Region.value_counts().nsmallest(3).index)]
df_who[~df_who.Region.isin(df_who.Region.value_counts().nsmallest(3).index)]
df_who.nsmallest(3, 'Population')
df_who= pd.read_csv('WHO_csv.csv')
df_who[df_who['Population'].between(1, 500)]
df = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df
df+1
df.add(1)
df + [1, 1,1]
df.add([1,1,1])
df.add([1,2,3], axis='columns')
df1 = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df1
df2 = pd.DataFrame({
'a': [1, 2, 3, 4],
'b': [5, 6, 7, 8],
'c': [9, 10, 11, 12]
})
df2
df1+df2
df1.add(df2)
df = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df
df-1
df.sub(1)
df.subtract(1)
df - [1, 1,1]
df.sub([1, 1,1])
df.subtract([1, 1,1])
df.sub([1, 2,3], axis='columns')
df.subtract([1, 2,3], axis='columns')
df1 = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df1
df2 = pd.DataFrame({
'a': [1, 2, 3, 4],
'b': [5, 6, 7, 8],
'c': [9, 10, 11, 12]
})
df2
df1-df2
df1.sub(df2)
df1.subtract(df2)
df= pd.read_csv('egg_price.csv')
df
df= pd.read_csv('egg_price.csv',parse_dates=['Date'],index_col='Date')
df
#shift the Price of eggs previous day
df['previous_day']=df['Price'].shift(1)
df
#shift the Price of eggs next day
df['next_day']=df['Price'].shift(-1)
df
df['1daychange']=df['previous_day']-df['next_day']
df
df['5days%return']= (df['Price']-df['Price'].shift(5))*100/df['Price'].shift(5)
df
df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],
'b': [1, 1, 2, 3, 5, 8],
'c': [1, 4, 9, 16, 25, 36]})
df
df.diff()
df.diff(axis=1)
df.diff(periods=3)
df.diff(periods=-1)
df = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df
df*2
df.mul(2)
df.multiply(2)
df*[1,2,3]
df.mul([1,2,3])
df.multiply([1,2,3])
df = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df
df.mul([1,2,3],axis='columns')
df.multiply([1,2,3],axis='columns')
df1 = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df1
df2 = pd.DataFrame({
'a': [1, 2, 3, 4],
'b': [5, 6, 7, 8],
'c': [9, 10, 11, 12]
})
df2
df1*df2
df1.mul(df2)
df1.multiply(df2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[5, 6],
"B":[7, 8]})
df2
np.dot(df1, df2)
df1 = pd.Series([7, 5, 6, 4, 9])
df1
df2= pd.Series([1, 2, 3, 10, 2])
df2
(7)(1) + (5)(2) + (6)(3) + (4)(10)+ (9)(2) = 7 + 10 + 18 + 40 + 18 = 93
df1.dot(df2)
df = pd.DataFrame({"A":[5, 6],
"B":[7, 8]})
df
df.prod(axis = 0)
df.prod(axis = 1)
df.product(axis = 0)
df.product(axis = 1)
df = pd.DataFrame({"A":[2, 4],
"B":[6, 8]})
df
df/2
df.div(2)
df.divide(2)
df.rdiv(2)
#2/df
df = pd.DataFrame({"A":[2, 4],
"B":[6, 8]})
df
df/[2,4]
df.div([2,4])
df.divide([2,4])
df.rdiv([2,4])
df = pd.DataFrame({"A":[2, 4],
"B":[6, 8]})
df
df.div([2,4],axis='columns')
df.divide([2,4],axis='columns')
df.rdiv([2,4],axis='columns')
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[5, 6],
"B":[7, 8]})
df2
df1/df2
df1.div(df2)
df1.divide(df2)
df1.rdiv(df2)
df = pd.DataFrame({"A":[5, 3, None, 4],
"B":[None, 2, 4, 3],
"C":[4, 3, 8, 5],
"D":[5, 4, 2, None]})
df
df.floordiv(2, fill_value = 50)
df= pd.DataFrame({"A":[5, 3, None, 4],
"B":[None, 2, 4, 3],
"C":[4, 3, 7, 5],
"D":[5, 4, 2, None]})
df
df.mod(3)
df = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df
df.pow(2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[5, 6],
"B":[7, 8]})
df2
df1.pow(df2)
df=pd.DataFrame({
'Temperature':[-30,32,-34,36],
'Windspeed':[6,7,10,-12]
})
df
df['Windspeed'] = df['Windspeed'].abs()
df
df=pd.DataFrame({
'Temperature':[-30,32,-34,36,1.2 + 1j],
'Windspeed':[6,7,10,-12,1.2 + 1j]
})
df
df=df.abs()
df
df.round(0)
df.round({'Temperature': 0, 'Windspeed': 1})
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who['Over60']=df_who['Over60'].apply(np.ceil)
df_who.head()
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who['Over60']=df_who['Over60'].apply(np.floor)
df_who.head()
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[1, 2],
"B":[3, 8]})
df2
df1.eq(df2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[1, 2],
"B":[3, 8]})
df2
df1.equals(df2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[1, 2],
"B":[3, 8]})
df1.ge(df2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[1, 2],
"B":[3, 8]})
df2
df1.gt(df2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[1, 2],
"B":[3, 8]})
df2
df1.le(df2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[1, 2],
"B":[3, 8]})
df2
df1.lt(df2)
df1 = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df1
df2 = pd.DataFrame({"A":[1, 2],
"B":[3, 8]})
df2
df1.ne(df2)
df = pd.DataFrame()
df['x'] = [1,2,16]
df['y'] = [3,4,5]
df['z']=df['x'] < df['y']
df
df['z'].all()
df['z'].any()
df['z'].bool
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
from IPython.display import YouTubeVideo
YouTubeVideo('bofaC0IckHo',width=900, height=500)
df_who['Country']=df_who.Country.str.upper()
df_who.head()
df_who['Region']=df_who.Region.str.lower()
df_who.head()
df = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
df.str.capitalize()
df = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
df.str.title()
df = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
df.str.swapcase()
df_who['Country_Region']=df_who.Country+'_'+df_who.Region
df_who.head()
df_who= pd.read_csv('WHO_csv.csv')
df_who.head()
df_who['c_r']=df_who['Country'].str.cat(df_who['Region'], sep=',')
df_who.head()
df_who.insert(2,'Country_Region',df_who.Country+'_'+df_who.Region)
df_who.head()
df_who.insert(2,'Country_Region',df_who.Country+'_'+df_who.Region,allow_duplicates=True)
df_who.head()
df = pd.DataFrame({"A":[1, 2],
"B":[3, 4]})
df = df.eval('C=A + B')
df
df = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df
df.assign(D=df['a']**2, E=df.b*2)
df = pd.DataFrame()
df['x'] = [11,2,16]
df['y'] = [3,4,15]
df.query('x > y')
df1=pd.DataFrame({
'ID':['F2017313014','F2017313015','F2017313017'],
'Marks':[98,97,99],
'Name':['Umer','Ali','Raza']
})
df1
df2=pd.DataFrame({
'ID':['F2017313014','F2017313018'],
'age':[20,22],
})
df2
df=pd.merge(df1,df2,on="ID",how="outer",indicator=True)
df
df=df.query('_merge != "both"')
df
from IPython.display import YouTubeVideo
YouTubeVideo('gnUKkS964WQ',width=900, height=500)
df_who.drop(['Country_Region'],axis=1,inplace=True)
df_who.head()
df_who.pop('Country')
df_who.head()
df_who= pd.read_csv('WHO_csv.csv')
df_who.drop(df_who.index[[0,2,4]],axis=0,inplace=True)
df_who.head()
from IPython.display import YouTubeVideo
YouTubeVideo('zY4doF6xSxY',width=900, height=500)
df_who.sort_values('Population')
df_who.sort_values('Population',ascending=True)
df_who.sort_values('Population',ascending=False)
df= pd.read_csv('radj.csv',header=0)
df
pd.DataFrame(np.sort(df.values, axis=1), index=df.index, columns=df.columns)
df=pd.read_csv("GTRX.txt",skiprows=1,sep=',')
df=df.sort_values(['Cell Name','Is Main BCCH TRX'],ascending=[True,False])
df.to_csv('output8.csv')
df = pd.DataFrame({
'a': [4, 5, 6, 7],
'b': [10, 20, 30, 40],
'c': [100, 50, -30, -50]
})
df
df.loc[(df.c - 43).abs().argsort()]
df_who["Rank"] = df_who["Population"].rank(ascending=True)
df_who.head()
df_who.sort_values("Population", inplace = True)
df_who.head()
df=pd.DataFrame({
'Day':[' 1/1/2019 ',' 1/2/2019 ',' 1/3/2019',' 1/4/2019'],
'Temperature':[30,32,34,36],
'Windspeed':[6,7,10,12],
'Event':[' Rain ',' Su nny ','Rain',' Sunny']
})
df
df[(df.Event=='Rain')]
df[(df.Day=='1/3/2019')]
df =df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
df
df[(df.Event=='Rain')]
df[(df.Day=='1/3/2019')]
df=' hello world!'
df
df.lstrip()
df=pd.read_csv("GTRX.txt",skiprows=1)
def band(Frequency):
if Frequency<100:
return 'GSM'
else:
return 'DCS'
df['Band']=df['Frequency'].apply(lambda x:band(x))
df.head()
df=pd.read_csv("GTRX.txt",skiprows=1)
df['Band'] = np.where(
((df['Frequency']>=25) & (df['Frequency']<=62)),
'GSM',
np.where(
(df['Frequency']>=556) & (df['Frequency']<=585),
'DCS',
'Other Band'))
df.head()
df=pd.read_csv("GTRX.txt",skiprows=1,sep=',')
df['CI'] = np.where(
(df['Cell Name'].str.startswith ('CI-')),
df['Cell Name'].str[8]+""+df['Cell Name'].str[3:7],
np.where(
(df['Cell Name'].str.startswith ('CII-')),
df['Cell Name'].str[9]+""+df['Cell Name'].str[4:8],
np.where(
(df['Cell Name'].str.startswith ('S-')),
df['Cell Name'].str[7]+""+df['Cell Name'].str[2:5],
np.where(
(df['Cell Name'].str.startswith ('N-')),
df['Cell Name'].str[7]+""+df['Cell Name'].str[2:5],
df['Cell Name'].str[:5]))))
df.head()
df_who= pd.read_csv('WHO_csv.csv')
df_who.Region.where(df_who.Region.isin(df_who.Region.value_counts().nlargest(3).index),other='Other').value_counts()
spl= pd.read_csv('Scopus_1926-1950.csv',encoding='latin-1')
# Preprocessing
# Remove Space between Author(s)ID
spl['Author(s)ID'] = spl['Author(s)ID'].str.replace(" ","")
# Remove Author(s)ID end with ;
spl['Author(s)ID'] = spl['Author(s)ID'].str.rstrip(';')
a=spl['Author(s)ID'].str.split(';', expand=True).add_prefix('A')
spl=pd.concat([spl, a], axis=1)
spl.head()
spl= pd.read_csv('Scopus_1926-1950.csv',encoding='latin-1')
# Preprocessing
# Remove Space between Author(s)ID
spl['Author(s)ID'] = spl['Author(s)ID'].str.replace(" ","")
# Remove Author(s)ID end with ;
spl['Author(s)ID'] = spl['Author(s)ID'].str.rstrip(';')
spl[['Author0', 'Author1', 'Author2','Author3','Author4']] = spl['Author(s)ID'].str.split(';', expand=True)
spl.head()
spl2= pd.read_csv('Scopus_1926-1950.csv',encoding='latin-1')
# Preprocessing
# Remove Space between Author(s)ID
spl2['Author(s)ID'] = spl2['Author(s)ID'].str.replace(" ","")
# Remove Author(s)ID end with ;
spl2['Author(s)ID'] = spl2['Author(s)ID'].str.rstrip(';')
spl2 = spl2.join(spl2['Author(s)ID'].str.split(';', expand=True).add_prefix('Author_'))
spl2.head()
spl1= pd.read_csv('Scopus_1926-1950.csv',encoding='latin-1')
# Preprocessing
# Remove Space between Author(s)ID
spl1['Author(s)ID'] = spl1['Author(s)ID'].str.replace(" ","")
# Remove Author(s)ID end with ;
spl1['Author(s)ID'] = spl1['Author(s)ID'].str.rstrip(';')
spl1['FAuthor'] = spl1['Author(s)ID'].str.split(';', expand=True)[0]
spl1.head()
Data = pd.DataFrame({'Identifier': ['55555-abc','77777-xyz','99999-mmm']})
Left = Data['Identifier'].str[:5]
Left
Data = pd.DataFrame({'Identifier': ['55555-abc','77777-xyz','99999-mmm']})
Right = Data['Identifier'].str[-3:]
Right
Data = pd.DataFrame({'Identifier': ['ID-55555-End','ID-77777-End','ID-99999-End']})
Mid = Data['Identifier'].str[3:8]
Mid
Data = pd.DataFrame({'Identifier': ['Umer-55555-H','Ali-77777-A','Ahmed-99999-S']})
BeforeSymbol = Data['Identifier'].str.split('-').str[0]
BeforeSymbol
Data = pd.DataFrame({'Identifier': ['111 IDAA','2222222 IDB','33 IDCCC']})
BeforeSpace = Data['Identifier'].str.split(' ').str[0]
BeforeSpace
Data = pd.DataFrame({'Identifier': ['IDAA-111','IDB-2222222','IDCCC-33']})
AfterSymbol = Data['Identifier'].str.split('-').str[1]
AfterSymbol
Data = pd.DataFrame({'Identifier': ['IDAA-111-AA','IDB-2222222-B','IDCCC-33-CCC']})
BetweenTwoSymbols = Data['Identifier'].str.split('-').str[1]
BetweenTwoSymbols
Data = pd.DataFrame({'Identifier': ['IDAA-111$AA','IDB-2222222$B','IDCCC-33$CCC']})
betweenTwoDifferentSymbols = Data['Identifier'].str.split('-').str[1]
betweenTwoDifferentSymbols = betweenTwoDifferentSymbols.str.split('$').str[0]
betweenTwoDifferentSymbols
from IPython.display import YouTubeVideo
YouTubeVideo('4R4WsDJ-KV',width=900, height=500)
df= pd.read_csv("Missing_Values_Handling.csv")
df
from IPython.display import YouTubeVideo
YouTubeVideo('EaGbS7eWSs0',width=900, height=500)

df.fillna(value=0)

df.fillna({
'Temperature':0,
'Windspeed':0,
'Event':'No Event'
})

df.fillna(method="ffill")

df.fillna(method="ffill",limit=1)

df.fillna(method="bfill")

df.fillna(method="bfill",limit=1)

df.fillna(method="ffill",axis="columns")

df.fillna(method="ffill",axis="columns",limit=1)

df.fillna(method="bfill",axis="columns")
df.fillna(method="bfill",axis="columns",limit=1)

df.interpolate()
# This is linear interpolate method
df
df.dropna()
df.dropna(axis=0)
df.dropna(axis=1)
df.set_index('Day',inplace=True)
df.dropna(axis=0,how='all')
df.dropna(thresh=2,axis=0)
df.dropna(thresh=len(df)*0.2, axis=0)
from IPython.display import YouTubeVideo
YouTubeVideo('XOxABiMhG2U',width=900, height=500)
df= pd.read_csv("Weather2.csv")
df
df= pd.read_csv('Weather2.csv',na_values=['Not Available',"n.a"])
df
df= pd.read_csv("Stocks.csv")
df
df= pd.read_csv("Stocks.csv",na_values=['not available',"n.a","n.a."])
df
df= pd.read_csv('Stocks.csv',na_values={'EPS':["not available","n.a","n.a."],
'Revenue':["not available","n.a","n.a.",-1],
'Price':["not available","n.a","n.a."],
'People':["not available","n.a","n.a."]
})
df
df=pd.read_excel("Stocks_ex.xlsx","Stocks")
df
def convert_people_cell(cell):
if cell=="n.a.":
return 'Sam Walton'
return cell
df=pd.read_excel("Stocks_ex.xlsx","Stocks",converters={
'People':convert_people_cell
})
df
def convert_people_cell(cell):
if cell=="n.a.":
return 'Sam Walton'
return cell
def convert_revenue_cell(cell):
if cell<0:
return 'NaN'
return cell
df=pd.read_excel("Stocks_ex.xlsx","Stocks",converters={
'People':convert_people_cell,
'Revenue':convert_revenue_cell
})
df
df= pd.read_csv('Replace_DS.csv')
df
df.replace(-999999,np.NaN)
df.replace({
-999999:np.NaN,
'No Event':np.NaN})
df= pd.read_csv('Replace_DS1.csv')
df
df.replace([-999999,-888888,'No Event'],np.NaN)
df= pd.read_csv('Replace_DS2.csv')
df
df.replace({
'Temperature':-999999,
'Windspeed':-888888,
'Event':'0'},np.NaN)
df= pd.read_csv('Replace_DS3.csv')
df
df.replace('[A-Za-z]','',regex=True)
df.replace({
'Temperature':'[A-Za-z]',
'Windspeed':'[A-Za-z]'},'',regex=True)
df = pd.DataFrame({"A":[12, 4, 5, 44, 1],
"B":[5, 2, 54, 3, 2],
"C":[20, 16, 7, 3, 8],
"D":[14, 3, 17, 2, 6]})
df
df.mask(df > 10, -25)
df = pd.DataFrame({"A":[12, 4, 5, None, 1],
"B":[7, 2, 54, 3, None],
"C":[20, 16, 11, 3, 8],
"D":[14, 3, None, 2, 6]})
df
df.mask(df.isna(), 1000)
df=pd.DataFrame({'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]})
df
df.clip_lower(3)
df=pd.DataFrame({'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]})
df
df.clip_upper(3)
df=pd.DataFrame({'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]})
df
df.clip(-4, 6)
df1 = pd.DataFrame({'A': [0, 2], 'B': [4, 4]})
df1
df2 = pd.DataFrame({'A': [0, 1], 'B': [8, 8]})
df2
df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2)
df1 =pd.Series([1, 2, 5, 6, 3, 7, 11, 0, 4])
df1
df2 =pd.Series([5, 3, 2, 1, 3, 9, 21, 3, 1])
df2
df1.combine(df2, (lambda x1, x2: x1 if x1 < x2 else x2))
series1 = pd.Series([70, 5, 0, 225, 1, 16, np.nan, 10, np.nan])
series2 = pd.Series([27, np.nan, 2, 23, 1, 95, 53, 10, 5])
series3 = series1.combine_first(series2)
series3
series4 = series2.combine_first(series1)
series4
df= pd.DataFrame({'A': ['a', 'b', 'c'],
'B': ['x', 'y', 'z']})
df
df1 = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
df1
df.update(df1)
df
df= pd.read_csv('List.csv')
df
df.sort_values("Score")
from pandas.api.types import CategoricalDtype
df['Score'] = df['Score'].astype(CategoricalDtype(categories=['Poor','Good','V Good','Exceptional'],ordered=True))
df
df.sort_values("Score")
df.loc[df.Score >= 'Good']
from IPython.display import YouTubeVideo
YouTubeVideo('0s_1IsROgDc',width=900, height=500)
pd.get_dummies(df, columns=['Score'])
df= pd.read_csv('Gender.csv')
df
df['Sex']=df.Gender.astype("category").cat.codes
df
df= pd.read_csv('Gender.csv')
df
df['Sex_num'] = df.Gender.map({'Female':0, 'Male':1})
df
df_who= pd.read_csv('WHO_csv.csv')
df_who['Reg_code']=df_who.Region.factorize()[0]
df_who.to_csv('test.csv')
df_who= pd.read_csv('WHO_csv.csv')
df_who.memory_usage()
df_who= pd.read_csv('WHO_csv.csv',dtype = {"Region" : "category"})
df_who.dtypes
df_who.memory_usage()
df_who= pd.read_csv('WHO_csv.csv',dtype = {"Region" : "category",'Country':'category'})
df_who.dtypes
df_who.memory_usage()
df= pd.read_csv('titanic.csv')
df.head()
df['cage']=pd.cut(df.Age, bins=[0, 18, 25, 99], labels=['child', 'young adult', 'adult'])
df.head()
df.tail()
df['cage'].dtypes
df= pd.read_csv('titanic.csv')
df['cage']=pd.qcut(df.Age,q=3)
df
df= pd.read_csv('titanic.csv')
df['cage']=pd.qcut(df.Age,q=3, labels=['child', 'young adult', 'adult'])
df
import pandas as pd
df= pd.qcut(range(5), 4)
df
df=pd.qcut(range(5), 3, labels=["good", "medium", "bad"])
df
df=pd.qcut(range(5), 4, labels=False)
df
from IPython.display import YouTubeVideo
YouTubeVideo('WGOEFok1szA',width=900, height=500)
df1= pd.read_csv('KPK_Weather.csv')
df1
df2= pd.read_csv('Punjab_Weather.csv')
df2
df=pd.concat([df1,df2])
df
df=pd.concat([df1,df2],ignore_index=True)
df
df=pd.concat([df1,df2],keys=['KPK','Punjab'])
df
df.loc['Punjab']
df.loc['KPK']
df1= pd.read_csv('WTemData.csv')
df1
df2= pd.read_csv('WWSData.csv')
df2
df=pd.concat([df1,df2])
df
df=pd.concat([df1,df2],axis=1)
df
df1=pd.DataFrame({
'City':['LHR','SHD','GJW','SKT'],
'temperature':[30,32,34,36]})
df1
df2=pd.DataFrame({
'City':['SKT','GJW','SHD','LHR'],
'windspeed':[7,8,9,10]})
df2
df=pd.concat([df1,df2],axis=1)
df
df1=pd.DataFrame({
'City':['LHR','SHD','GJW','SKT'],
'temperature':[30,32,34,36]},index=[0,1,2,3])
df1
df2=pd.DataFrame({
'City':['SKT','GJW','SHD','LHR'],
'windspeed':[7,8,9,10]},index=[3,2,1,0])
df2
df=pd.concat([df1,df2],axis=1)
df
from IPython.display import YouTubeVideo
YouTubeVideo('h4hOPGo4UVU',width=900, height=500)

df1=pd.DataFrame({
'City':['LHR','SHD','GJW','SKT'],
'temperature':[30,32,34,36]})
df1
df2=pd.DataFrame({
'City':['SKT','GJW','SHD','LHR'],
'windspeed':[7,8,9,10]})
df2
df=pd.merge(df1,df2,on="City")
df
df1= pd.read_csv('Family.csv',skipfooter=1,engine='python')
df1
df2= pd.read_csv('Math.txt',sep=",")
df2
df=pd.merge(df1,df2,on="ID")
df
df1= pd.read_csv('ListStu.txt',sep=",")
df1
df2= pd.read_csv('Listage.txt',sep=",")
df2
df=pd.merge(df1,df2,on="ID")
df
df=pd.merge(df1,df2,on="ID",how="inner")
df

df1= pd.read_csv('ListStu.txt',sep=",")
df1
df2= pd.read_csv('Listage.txt',sep=",")
df2
df=pd.merge(df1,df2,on="ID",how="outer")
df
df1= pd.read_csv('ListStu.txt',sep=",")
df1
df2= pd.read_csv('Listage.txt',sep=",")
df2
df=pd.merge(df1,df2,on="ID",how="left")
df

df1= pd.read_csv('ListStu.txt',sep=",")
df1
df2= pd.read_csv('Listage.txt',sep=",")
df2
df=pd.merge(df1,df2,on="ID",how="right")
df
import pandas as pd
df1= pd.read_csv('ListStu.txt',sep=",")
df1
df2= pd.read_csv('Listage.txt',sep=",")
df2
df=pd.merge(df1,df2,on="ID",how="outer",indicator=True)
df
df1=pd.DataFrame({
'ID':['F2017313014','F2017313015'],
'Marks':[98,97]
})
df1
df2=pd.DataFrame({
'ID':['F2017313014','F2017313015'],
'Marks':[96,95]
})
df2
df=pd.merge(df1,df2,on="ID")
df
df1=pd.DataFrame({
'ID':['F2017313014','F2017313015'],
'Marks':[98,97]
})
df1
df2=pd.DataFrame({
'ID':['F2017313014','F2017313015'],
'Marks':[96,95]
})
df2
df=pd.merge(df1,df2,on="ID",suffixes=('_Math','_Physics'))
df
df1=pd.DataFrame({
'ID':['F2017313014','F2017313015'],
'Marks':[98,97],
'Name':['Umer','Ali']
})
df1
df2=pd.DataFrame({
'ID':['F2017313014','F2017313015'],
'Marks':[96,95],
'Name':['Amir','Yaqoob']
})
df2
df=pd.merge(df1,df2,on="ID",suffixes=('_left','_right'))
df
df1 = pd.DataFrame({'key': list('aceace'), 'group': list('aaabbb'), 'lvalue': [1, 2, 3, 1, 2, 3]})[[ 'group','key', 'lvalue']]
df1
df2 = pd.DataFrame({'key': list('bcd'), 'rvalue': [1, 2, 3]})[[ 'rvalue','key']]
df2
pd.merge_ordered(df1, df2, left_by='group')
pd.merge_ordered(df1, df2, left_by='group',fill_method='ffill')
df1 = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']})
df1
df2 = pd.DataFrame({'a': [1, 2, 3, 6, 7],
'right_val': [1, 2, 3, 6, 7]})
df2
pd.merge_asof(df1, df2, on='a')
pd.merge_asof(df1, df2, on='a', allow_exact_matches=True)
pd.merge_asof(df1, df2, on='a', allow_exact_matches=False)
df1 = pd.DataFrame([[1,2,3,4], [6,7,8,9]], columns=['D', 'B', 'E', 'A'])
df1
df2 = pd.DataFrame([[10,20,30,40], [60,70,80,90], [600,700,800,900]], columns=['A', 'B', 'C', 'D'])
df2
a1,a2=df1.align(df2, join='inner', axis=1)
a1
a2
a1.append(a2)
a1.append(a2,ignore_index=True)
import os
#import glob
from glob import glob
#all_files = glob.glob('Scopus*.csv')
all_files = glob('Scopus*.csv')
df_from_each_file = (pd.read_csv(f,encoding='latin-1').assign(fname=os.path.basename(f).split('.')[0]) for f in all_files)
concatdf = pd.concat(df_from_each_file, ignore_index=True)
concatdf[['Title','fname']].head()
scopus_files = sorted(glob('Scopus*.csv'))
scopus_files
pd.concat((pd.read_csv(file,encoding='latin-1') for file in scopus_files))
pd.concat((pd.read_csv(file,encoding='latin-1') for file in scopus_files), ignore_index=True)
student_files = sorted(glob('Student*.csv'))
student_files
pd.concat((pd.read_csv(file) for file in student_files), axis='columns')
from IPython.display import YouTubeVideo
YouTubeVideo('xPPs59pn6qU',width=900, height=500)
df= pd.read_csv('Weather3.csv')
df
df.pivot(index="Date",columns="City")
df.pivot(index="Date",columns="City",values='Temperature')
df= pd.read_csv('Weather4.csv')
df
df.pivot_table(index="City",columns="Date")
df.pivot_table(index="City",columns="Date",values='Temperature')
df.pivot_table(index="City",columns="Date",values='Temperature',aggfunc='sum')
df.pivot_table(index="City",columns="Date",values='Temperature',aggfunc='count')
df.pivot_table(index="City",columns="Date",values='Temperature',aggfunc='mean')
df.pivot_table(index="City",columns="Date",values='Temperature',margins=True,aggfunc='sum')
df.pivot_table(index="City",columns="Date",values='Temperature',margins=True)
df= pd.read_csv('Weather5.csv',parse_dates=['Date'])
df
df.pivot_table(index=pd.Grouper(freq='M',key='Date'),columns='City')
df.pivot_table(index=pd.Grouper(freq='M',key='Date'),columns='City',aggfunc='sum')
from IPython.display import YouTubeVideo
YouTubeVideo('oY62o-tBHF4',width=900, height=500)
df= pd.read_csv('Weather6.csv')
df
df1=pd.melt(df,id_vars=["Day"])
df1
df1=pd.melt(df,id_vars=["Day"],var_name="City", value_name='Temp')
df1
df1[df1["City"]=="Lahore"]
from IPython.display import YouTubeVideo
YouTubeVideo('I_kUj-MfYys',width=900, height=500)
df=pd.read_csv("Handedness.csv")
df
pd.crosstab(df.Nationality,df.Handedness)
pd.crosstab(df.Sex,df.Handedness)
pd.crosstab(df.Sex,df.Handedness,margins=True)
pd.crosstab(df.Sex,df.Handedness,margins=True,values=df.Age,aggfunc='mean')
pd.crosstab(df.Sex,df.Handedness,normalize='index')
pd.crosstab(df.Sex,[df.Handedness,df.Nationality],margins=True)
pd.crosstab([df.Sex,df.Nationality],df.Handedness,margins=True)
from IPython.display import YouTubeVideo
YouTubeVideo('qy0fDqoMJx8',width=900, height=500)
df= pd.read_csv('GroupBy.csv')
df
df.groupby('City').Temperature.sum()
df.groupby(['Province', 'City']).Temperature.sum()
df.groupby(['Province', 'City']).Temperature.sum().unstack()
df= pd.read_csv('GroupBy.csv')
df1=df.groupby('City')
df2=df1.get_group('Lahore')
df2
df2.mean()
df2.describe()
df = pd.read_csv('chipotle.tsv',sep='\t')
df.head()
df.dtypes
# Preprocessing (Step-1)
df['item_price'] = df['item_price'].str.replace("$","")
df.dtypes
# Preprocessing (Step-2)
df['item_price'] = pd.to_numeric(df['item_price'])
df.dtypes
df[df.order_id == 1].item_price.sum()
df.groupby('order_id').item_price.sum().head()
df.groupby('order_id').item_price.agg(['sum', 'count']).head()
len(df.groupby('order_id').item_price.sum())
...is smaller than the input to the function:
len(df.item_price)
df['total_price'] = df.groupby('order_id').item_price.transform('sum')
df.head()
df['percent_of_total'] = df.item_price / df.total_price
df = pd.DataFrame({"measure": ["a","a","b","a","c","c"],
"aq": [10,20,30,20,30,50]})
df
df["colour"] = (100.0 * df["aq"] /
df.groupby("measure")["aq"].transform(max))
df
from IPython.display import YouTubeVideo
YouTubeVideo('ht5buXUMqkQ',width=900, height=500)
df= pd.read_csv('DupDS.csv')
df
df.duplicated()
df.duplicated().sum()
df.loc[df.duplicated(),:]
df.loc[df.duplicated(keep='first'),:]
df.loc[df.duplicated(keep='last'),:]
df.loc[df.duplicated(keep=False),:]
df.drop_duplicates().shape
df.drop_duplicates(keep='first').shape
df.drop_duplicates(keep='last').shape
df.drop_duplicates(keep=False).shape
df.drop_duplicates()
df.drop_duplicates(keep='first')
df.drop_duplicates(keep='last')
df.drop_duplicates(keep=False)
from IPython.display import YouTubeVideo
YouTubeVideo('BUOy4RUUepg',width=900, height=500)
df=pd.read_excel("Company.xlsx",header=[0,1],index_col=[0])
df
df1=df.stack()
df1
df2=df.stack(level=0)
df2
df2.unstack()
df=pd.read_excel("Company1.xlsx",header=[0,1,2],index_col=[0])
df
df1=df.stack()
df1
df2=df.stack(level=0)
df2
df3=df.stack(level=1)
df3
df4=df.stack(level=2)
df4
df=pd.read_csv("rep.csv")
df
pd.concat([df]*3)
pd.concat([df]*3, ignore_index=True)
rep2 = df.loc[np.repeat(df.index.values,df.Rep)].reset_index(drop=True)
rep2
rep2 = rep2.drop("Rep",axis=1).reset_index(drop=True)
rep2
df= pd.read_csv('maybe.csv',header=0)
df
df['count'] = df.groupby('Year').cumcount() + 0
df
df = pd.DataFrame(np.arange(6).reshape(3,2), columns=['a', 'b'])
df
df.compound()
df = pd.Series([2, 10, np.nan, 4, 3, 0, 1])
df.cumprod()
df = pd.Series([2, np.nan, 5, -1, 0])
df
df.cummax()
df.cummax(skipna=False)
df = pd.DataFrame([[2.0, 1.0],
[3.0, np.nan],
[1.0, 0.0]],
columns=list('AB'))
df
df.cummax()
df.cummax(axis=1)
df = pd.Series([2, np.nan, 5, -1, 0])
df
df.cummin()
df.cummin(skipna=False)
df = pd.DataFrame([[2.0, 1.0],
[3.0, np.nan],
[1.0, 0.0]],
columns=list('AB'))
df.cummin()
df.cummin(axis=1)
df = pd.Series([2, np.nan, 5, -1, 0])
df
df.cumsum()
df.cumsum(skipna=False)
df = pd.DataFrame([[2.0, 1.0],
[3.0, np.nan],
[1.0, 0.0]],
columns=list('AB'))
df
df.cumsum()
df.cumsum(axis=1)
df= pd.read_csv('maybe.csv',header=0)
df
df.iat[2, 1]=1983
df
df['GD'] = (df['Year'] != df['Year'].shift(1)).astype(int).cumsum()
df['count'] = df.groupby('GD').cumcount() + 0
df
df = pd.DataFrame({'B': [0, 3, 2, 7, 4]})
df
df.expanding(2).sum()
df = pd.DataFrame({'B': [0, 3, 2, 7, 4]})
df
df.rolling(2).sum()
df= pd.read_csv('AAPL.csv',parse_dates=['Date'])
df
df['dchange']=100*df['Close'].pct_change()
df
from IPython.display import YouTubeVideo
YouTubeVideo('yiO43TQ4xvc',width=900, height=500)
df_who= pd.read_csv('WHO_csv.csv')
pd.get_option('display.max_rows')
df_who= pd.read_csv('WHO_csv.csv')
pd.set_option('display.max_rows',None)
df_who
pd.reset_option('all',silent=True)
import pandas as pd
df_who= pd.read_csv('WHO_csv.csv')
pd.set_option('display.max_rows',6)
df_who
pd.reset_option('display.max_rows')
df_who
#Method-2
#pd.reset_option('all',silent=True)
pd.reset_option('all',silent=True)
df_who= pd.read_csv('WHO_csv.csv')
pd.set_option('display.max_colwidth',10)
df_who.head()
pd.reset_option('all',silent=True)
df_who= pd.read_csv('WHO_csv.csv')
pd.set_option('display.precision',1)
df_who.head()
pd.reset_option('all',silent=True)
df_who= pd.read_csv('WHO_csv.csv')
pd.set_option('display.float_format','{:,}'.format)
df_who.head()
df= pd.read_csv('Sale.txt',sep=",",parse_dates=['Date'])
df
df.style.format({'Symbol': str.lower,'Volume':'{:,}','Date':'{:%m/%d/%y}','Close':'${:.2f}'})
(df.style.format({'Symbol': str.lower,'Volume':'{:,}','Date':'{:%m/%d/%y}','Close':'${:.2f}'})
.hide_index()
.highlight_min(['Close','Volume'], color='red')
.highlight_max('Close', color='lightgreen')
)
(df.style.format({'Symbol': str.lower,'Volume':'{:,}','Date':'{:%m/%d/%y}','Close':'${:.2f}'})
.hide_index()
.background_gradient(subset='Volume', cmap='Blues')
)
(df.style.format({'Symbol': str.lower,'Volume':'{:,}','Date':'{:%m/%d/%y}','Close':'${:.2f}'})
.hide_index()
.bar('Volume', color='lightblue', align='zero')
.set_caption('Stock Prices from October 2016')
)
from IPython.display import YouTubeVideo
YouTubeVideo('yCgJGsg0Xa4',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('r0s4slGHwzE',width=900, height=500)
df= pd.read_csv('AAPL.csv')
df.head()
type(df.Date[0])
df= pd.read_csv('AAPL.csv',parse_dates=['Date'])
df.head()
type(df.Date[0])
df= pd.read_csv('AAPL.csv',parse_dates=['Date'],index_col='Date')
df.head()
df.index
df['2018-01']
df['2018-01'].Close.mean()
df.loc['2018-01-29']
df['2018-01-15':'2018-01-20']
df['2018-01-15':'2018-01-20'].mean()
df['2018-01-15':'2018-01-20'].Close.mean()
df.resample('M').mean()
df['Close'].resample('M').mean()
from IPython.display import YouTubeVideo
YouTubeVideo('A9c7hGXQ5A8',width=900, height=500)
df= pd.read_csv('AAPLwd.csv')
df.head()
df1= pd.date_range(start="1/2/2018", end="1/31/2018",freq='B')
df1
df.set_index(df1,inplace=True)
df
df.asfreq('D',method='pad')
df.asfreq('W',method='pad')
df.asfreq('H',method='pad')
df= pd.date_range(start='1/1/2017',periods=72, freq='B')
df
ts=pd.Series(np.random.randint(1,10,len(df)),index=df)
ts
bd=pd.bdate_range(start='9/1/2019', end='9/16/2019')
bd
tr=pd.timedelta_range(start='1 day', periods=4)
tr
tr=pd.timedelta_range(start='1 day', periods=4,closed='right')
tr
tr=pd.timedelta_range(start='1 day', periods=4,closed='left')
tr
tr=pd.timedelta_range(start='1 day', end='2 days', freq='6H')
tr
ir=pd.interval_range(start=0, end=5)
ir
ir= pd.interval_range(start=pd.Timestamp('2017-01-01'),
end=pd.Timestamp('2017-01-04'))
ir
ir=pd.interval_range(start=0, periods=4, freq=1.5)
ir
ir= pd.interval_range(start=pd.Timestamp('2019-01-01'),
periods=3, freq='MS')
ir
ir=pd.interval_range(start=0, end=6, periods=4)
ir
ir=pd.interval_range(end=5, periods=4, closed='both')
ir
from IPython.display import YouTubeVideo
YouTubeVideo('Fo0IMzfcnQE',width=900, height=500)
df= pd.read_csv('AAPLus.csv')
df.head()
df1= pd.date_range(start="7/1/2018", end="7/31/2018",freq='B')
df1
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
usb=CustomBusinessDay(calendar=USFederalHolidayCalendar())
usb
df1= pd.date_range(start="7/1/2018", end="7/31/2018",freq=usb)
df1
df.set_index(df1,inplace=True)
df
df= pd.read_csv('AAPLpk.csv')
df
df1= pd.date_range(start="8/1/2018", end="8/31/2018",freq='B')
df1
'''
class USFederalHolidayCalendar(AbstractHolidayCalendar):
"""
US Federal Government Holiday Calendar based on rules specified by:
https://www.opm.gov/policy-data-oversight/
snow-dismissal-procedures/federal-holidays/
"""
rules = [
Holiday('New Years Day', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
USMemorialDay,
Holiday('July 4th', month=7, day=4, observance=nearest_workday),
USLaborDay,
USColumbusDay,
Holiday('Veterans Day', month=11, day=11, observance=nearest_workday),
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
'''
# Pakistan Holidays Calendar Code:
# Import Required Lib:
from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday
class PakistanFederalHolidayCalendar(AbstractHolidayCalendar):
"""
Pakistan Federal Government Holiday Calendar based on rules specified by:
https://publicholidays.pk/2018-dates/
"""
rules = [
Holiday('Independence Day', month=8, day=14)
]
pakc=CustomBusinessDay(calendar=PakistanFederalHolidayCalendar())
pakc
df1= pd.date_range(start="8/1/2018", end="8/31/2018",freq=pakc)
df1
df.set_index(df1,inplace=True)
df
df= pd.read_csv('AAPLusaeed.csv')
df
### Umer Saeed Birthday Calendar Code:
### Import Required Lib:
from pandas.tseries.holiday import AbstractHolidayCalendar, nearest_workday, Holiday
class UmerSaeedBirthdayCalendar(AbstractHolidayCalendar):
rules = [
Holiday('Umer Saeed Birthday', month=1, day=7,observance=nearest_workday),
]
ubc=CustomBusinessDay(calendar=UmerSaeedBirthdayCalendar())
ubc
df1= pd.date_range(start="1/1/2018", end="1/31/2018",freq=ubc)
df1
df.set_index(df1,inplace=True)
df
from IPython.display import Image
Image(filename='Rules.png')
ksa=CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu')
df= pd.date_range(start="1/1/2018", end="1/31/2018",freq=ksa)
df
ksa=CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu',holidays=['2018-01-08'])
df= pd.date_range(start="1/1/2018", end="1/31/2018",freq=ksa)
df
from IPython.display import YouTubeVideo
YouTubeVideo('igWjq3jtLYI',width=900, height=500)
from IPython.display import Image
Image(filename='date_format.png')
df= pd.read_csv('date_data.txt')
df
df.dtypes
df = df.apply(pd.to_datetime)
df
df.dtypes
df= pd.read_csv('date_data_time.txt')
df
df.dtypes
df = df.apply(pd.to_datetime)
df
from IPython.display import Image
Image(filename='Format_D_U_E.png')
pd.to_datetime('5/1/2018')
pd.to_datetime('5/1/2018',dayfirst=True)
pd.to_datetime('5$1$2018',format='%d$%m$%Y')
df=['2019-07-01','2019-07-02','2019-07-03','2019-07-04','2019-07-05','abc']
pd.to_datetime(df)
pd.to_datetime(df,errors='ignore')
pd.to_datetime(df,errors='coerce')
from IPython.display import Image
Image(filename='Epoch_Time.png')
t=1570434845
# t is in seconds
# unit : string, default 'ns'
df=pd.to_datetime([t],unit='s')
df
df.view('int64')
td = pd.Timedelta('3 days 06:05:01.000000111')
td
td.seconds
td = pd.Timedelta(133, unit ='s')
td
td.seconds
a= pd.Period('2019')
a
a.start_time
a.end_time
a=pd.Period('2019',freq='A')
a
a.start_time
a.end_time
a= pd.Period('2019')
a=a+1
a.start_time
a.end_time
a=pd.Period('2019',freq='A')
a=a+1
a
a.start_time
a.end_time
m=pd.Period('2019',freq='M')
m
m.start_time
m.end_time
m=pd.Period('2019-10')
m
m.start_time
m.end_time
m=pd.Period('2019',freq='M')
m=m+1
m
m.start_time
m.end_time
m=pd.Period('2019-12')
m=m+1
m
m.start_time
m.end_time
d=pd.Period('2019-10-07')
d
d.start_time
d.end_time
d=pd.Period('2019-10-07',freq='D')
d
d.start_time
d.end_time
d=pd.Period('2018-02-28')
d=d+1
d
d.start_time
d.end_time
d=pd.Period('2020-02-28',freq='D')
d=d+1
d
d.start_time
d.end_time
pd.Period('2018-03').is_leap_year
pd.Period('2018').is_leap_year
pd.Period('2020-03').is_leap_year
pd.Period('2020-10-10').is_leap_year
h=pd.Period('2020-02-28',freq='H')
h
h.start_time
h.end_time
h=pd.Period('2020-02-28 14:00:00',freq='H')
h
h.start_time
h.end_time
h=pd.Period('2020-02-28',freq='H')
h=h+1
h
h.start_time
h.end_time
h=pd.Period('2020-02-28 23:00:00',freq='H')
h=h+1
h
h.start_time
h.end_time
q=pd.Period('2018',freq='Q')
q
q.start_time
q.end_time
q=pd.Period('2018-4',freq='Q')
q
q.start_time
q.end_time
q=pd.Period('2018Q3',freq='Q')
q
q.start_time
q.end_time
q=pd.Period('2018',freq='Q')
q=q+1
q
q.start_time
q.end_time
q=pd.Period('2018-4',freq='Q')
q=q+1
q
q.start_time
q.end_time
q=pd.Period('2018Q3',freq='Q')
q=q+1
q
q.start_time
q.end_time
qc=pd.Period('2018', freq='Q-Jan')
qc
qc.start_time
qc.end_time
q1=pd.Period('2018', freq='Q-Jan')
q1.start_time
q1.end_time
q2=pd.Period('2016', freq='Q-Jan')
q2.start_time
q2.end_time
q3=q1-q2
q3
q1=pd.Period('2018', freq='Q-Nov')
q2=pd.Period('2016', freq='Q-Nov')
q3=q1-q2
q3
df= pd.read_csv('AAPL.csv',parse_dates=["Date"],index_col="Date")
df
df.to_period('Q')
df= pd.read_csv('AAPL.csv',parse_dates=['Date'])
df.head()
df["NEW_DATE"] = df['Date'] - pd.offsets.DateOffset(years=1)
df.head()
df= pd.read_csv('AAPL.csv',parse_dates=['Date'])
df["NEW_DATE"] = df['Date'] + pd.offsets.Day(10)
df.head()
df= pd.read_csv('AAPL.csv',parse_dates=['Date'])
df["NEW_DATE"] = df['Date'] + pd.offsets.BDay(10)
df.head()
h=pd.Period('2018-12-15 20:00:00',freq='H')
h=h+pd.offsets.Hour(1)
h
h.start_time
h.end_time
h=pd.Period('2018-12-15 20:00:00',freq='H')
h=h+pd.offsets.Hour(-22)
h
h.start_time
h.end_time
q=pd.Period('2018-5',freq='Q')
q.start_time
q.end_time
q=q.asfreq('M',how='start')
q.start_time
q.end_time
q=pd.Period('2018-5',freq='Q')
q.start_time
q.end_time
q=q.asfreq('M',how='end')
q.start_time
q.end_time
from IPython.display import YouTubeVideo
YouTubeVideo('3l9YOS4y24Y',width=900, height=500)
df=pd.period_range('2011','2018',freq='Q')
df
df=pd.period_range('2011','2018',freq='Q-Jan')
df
df[0].start_time
df[0].end_time
df= pd.read_csv('ind_data.csv')
df
df.set_index('idx',inplace=True)
df
df.index
df.index= pd.PeriodIndex(df.index,freq='Q')
df.index
df['2012']
df['2011':'2012']
df= pd.read_csv('ts_ds.csv')
df
from IPython.display import Image
Image(filename='Required_Output.png')
df.set_index('LineItem', inplace = True)
df
df=df.T
df
df.index
df.index= pd.PeriodIndex(df.index,freq='Q-Jan')
df['Start Date']=df.index.map(lambda x: x.start_time)
df['End Date']=df.index.map(lambda x: x.end_time.floor("H"))
df
# Day Name
df['DayName']=df['Start Date'].dt.weekday_name
# Get day of the month that a Period falls on
df['dayc']=df['Start Date'].dt.day
# Get Month Name
df['month']=df['Start Date'].dt.month
# Return the day of the year.
df['daynumber']=df['Start Date'].dt.dayofyear
# Get quarter Number
df['quarterernumb']=df['Start Date'].dt.quarter
# Day of the week the period lies in, with Monday=0 and Sunday=6.
df['dow']=df['Start Date'].dt.dayofweek
# Get the total number of days in the month that this period falls on.
df['dim']=df['Start Date'].dt.days_in_month
# year
df['yeartest']=df['Start Date'].dt.year
# Get the week of the year on the given Period.
df['weekn']=df['Start Date'].dt.week
df
from IPython.display import Image
Image(filename='TimeZone.png')
from IPython.display import YouTubeVideo
YouTubeVideo('9IW2GIJajLs',width=900, height=500)
from pytz import all_timezones
all_timezones
df=pd.read_csv('TZH.csv',parse_dates=['Date Time'])
df
df.set_index('Date Time', inplace = True)
df
df.index
df=df.tz_localize(tz='Asia/Istanbul')
df
df.index
df=df.tz_convert(tz='Asia/Karachi')
df
df.index
df=df.tz_convert(tz='dateutil/Europe/Berlin')
df
pakistan= pd.read_csv('Karachi.csv',parse_dates=['Date'])
pakistan.head()
pakistan.set_index('Date', inplace = True)
pakistan.head()
pakistan.index
pakistan=pakistan.tz_localize(tz='Asia/Karachi')
pakistan
pakistan.index
turki= pd.read_csv('Istanbul.csv',parse_dates=['Date'])
turki.head()
turki.set_index('Date', inplace = True)
turki.head()
turki.index
turki=turki.tz_localize(tz='Asia/Istanbul')
turki.index
turki
f_res=pakistan+turki
f_res
df= pd.read_csv('egg_price.csv',parse_dates=['Date'],index_col='Date')
df
df.tshift(1)
df.tshift(-1)
i = pd.date_range('2018-04-09', periods=4, freq='2D')
ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
ts
ts.first('3D')
Notice the data for 3 first calender days were returned, not the first 3 days observed in the dataset, and therefore data for 2018-04-13 was not returned.
i = pd.date_range('2018-04-09', periods=4, freq='2D')
ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
ts
ts.last('3D')
df= pd.read_csv('Cell_H_O.csv',index_col="Time",parse_dates=["Time"])
df.head()
df.between_time('6:00','8:00')
df= pd.read_csv('Cell_H.csv',index_col="Time",parse_dates=["Time"])
df.head()
df.between_time('6:00','8:00')
df= pd.read_csv('Cell_H_O.csv',index_col="Time",parse_dates=["Time"])
df.head()
df.at_time('6:00')
df= pd.read_csv('Cell_H.csv',index_col="Time",parse_dates=["Time"])
df.head()
df.at_time('6:00')
from IPython.display import YouTubeVideo
YouTubeVideo('oH3wYKvwpJ8',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('tcRGa2soc-c',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('-NbY7E9hKxk',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('te5JrSCW-LY',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('CWRKgBtZN18',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('RlIiVeig3hc',width=900, height=500)
from IPython.display import YouTubeVideo
YouTubeVideo('0lsmdNLNorY',width=900, height=500)
df_who= pd.read_csv('WHO_csv.csv',dtype = {"Region" : "category"})
df_who.head()
df_who['Country0']=df_who['Country'].str.get(0)
df_who.head()